Imports


In [24]:
%pylab inline

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

try:
    from future.builtins import (bytes, str, open, super, range,
                                 zip, round, input, int, pow, object)
except:
    pass

# ---- Standard Libraries not included in pylab
import collections
import glob
import json
import random
import time
from StringIO import StringIO

# ---- Extra Libraries for additional functionality
import elasticsearch
from elasticsearch import Elasticsearch
es = Elasticsearch(['http://search-01.ec2.internal:9200'])


Populating the interactive namespace from numpy and matplotlib
WARNING: pylab import has clobbered these variables: ['bytes', 'random']
`%matplotlib` prevents importing * from pylab and numpy

Look at the Current Mapping


In [25]:
mapping = es.indices.get_mapping("gsod")
obs_mapping = collections.OrderedDict(mapping['gsod']['mappings']['observation']['properties'])
for ob_params in obs_mapping:
    print("{:18s}{}".format(ob_params, obs_mapping[ob_params]))


Num of Obs        {u'type': u'string'}
Wind Speed        {u'type': u'string'}
FRSHTT            {u'type': u'string'}
Snow Depth        {u'type': u'string'}
Gust              {u'type': u'string'}
SLP               {u'type': u'string'}
Mean Temp         {u'type': u'string'}
Max Wind Speed    {u'type': u'string'}
Max Temp          {u'type': u'string'}
STP               {u'type': u'string'}
Min Temp          {u'type': u'string'}
WBAN              {u'type': u'string'}
Date              {u'type': u'date', u'format': u'dateOptionalTime'}
Station Id        {u'type': u'string'}
Dew Point         {u'type': u'string'}
Precipitation     {u'type': u'string'}
Visibility        {u'type': u'string'}

Create new mapping for a year


In [26]:
%%bash

curl -XPUT 'http://search-01.ec2.internal:9200/gsod_1929'
curl -XPUT 'http://search-01.ec2.internal:9200/gsod_1929/_mapping/observation' -d '
{
    "observation": {
        "properties": {
            "Num of Obs": {"type": "integer"},
            "Wind Speed": {"type": "float"},
            "FRSHTT": {"type": "string"},
            "Snow Depth": {"type": "float"},
            "Gust": {"type": "float"},
            "SLP": {"type": "float"},
            "Mean Temp": {"type": "float"},
            "Max Wind Speed": {"type": "float"},
            "Max Temp": {"type": "float"},
            "STP": {"type": "float"},
            "Min Temp": {"type": "float"},
            "WBAN": {"type": "string"},
            "Date": {"type": "date", "format": "dateOptionalTime"},
            "Station Id": {"type": "string"},
            "Dew Point": {"type": "float"},
            "Precipitation": {"type": "float"},
            "Visibility": {"type": "float"}
        }
    }
}'


{"error":"RemoteTransportException[[search-06][inet[/172.31.49.239:9300]][indices:admin/create]]; nested: IndexAlreadyExistsException[[gsod_1929] already exists]; ","status":400}{"acknowledged":true}
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   178  100   178    0     0  27397      0 --:--:-- --:--:-- --:--:-- 44500
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   849  100    21  100   828   2049  80804 --:--:-- --:--:-- --:--:-- 92000

In [27]:
mapping = es.indices.get_mapping("gsod_1929")
obs_mapping = collections.OrderedDict(mapping['gsod_1929']['mappings']['observation']['properties'])
for ob_params in obs_mapping:
    print("{:18s}{}".format(ob_params, obs_mapping[ob_params]))


Num of Obs        {u'type': u'integer'}
Wind Speed        {u'type': u'float'}
FRSHTT            {u'type': u'string'}
Snow Depth        {u'type': u'float'}
Gust              {u'type': u'float'}
SLP               {u'type': u'float'}
Mean Temp         {u'type': u'float'}
Max Wind Speed    {u'type': u'float'}
Max Temp          {u'type': u'float'}
STP               {u'type': u'float'}
Min Temp          {u'type': u'float'}
WBAN              {u'type': u'string'}
Date              {u'type': u'date', u'format': u'dateOptionalTime'}
Station Id        {u'type': u'string'}
Dew Point         {u'type': u'float'}
Precipitation     {u'type': u'float'}
Visibility        {u'type': u'float'}

In [10]:
# This is an example of deleting an index
# %%bash

# curl -XDELETE 'http://search-01.ec2.internal:9200/gsod_1929'

In [63]:
query = json.dumps({
    "query": {
        "match": { 
            "Mean Temp": "89.3"
        }
    },
    "_source": "Max Temp",
    "size": num_of_returns
})

es.search(index="gsod", body=query)


Out[63]:
{u'_shards': {u'failed': 0, u'successful': 5, u'total': 5},
 u'hits': {u'hits': [{u'_id': u'AUvBIbSjDzV4m8XpLaJ1',
    u'_index': u'gsod',
    u'_score': 9.448398,
    u'_source': {u'Max Temp': u'98.6'},
    u'_type': u'observation'}],
  u'max_score': 9.448398,
  u'total': 27556},
 u'timed_out': False,
 u'took': 30}

In [2]:
import es_gsod

gsod = es_gsod.ESGsod()
gsod.search_by_date("1970-09-24", "1970-09-25", 2, "Mean Temp")


Out[2]:
{u'_shards': {u'failed': 0, u'successful': 5, u'total': 5},
 u'hits': {u'hits': [{u'_id': u'AUvB4D-ly5q7zM_GbSn4',
    u'_index': u'gsod',
    u'_score': 1.0,
    u'_source': {u'Mean Temp': u'42.2'},
    u'_type': u'observation'},
   {u'_id': u'AUvB4D-uy5q7zM_GbSoE',
    u'_index': u'gsod',
    u'_score': 1.0,
    u'_source': {u'Mean Temp': u'37.0'},
    u'_type': u'observation'}],
  u'max_score': 1.0,
  u'total': 4726},
 u'timed_out': False,
 u'took': 8}